﻿// *********************************************************
// 
//     Copyright (c) Microsoft. All rights reserved.
//     This code is licensed under the Apache License, Version 2.0.
//     THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
//     ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
//     IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
//     PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
// 
// *********************************************************
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using Bio.SimilarityMatrices;

namespace Bio.Algorithms.Alignment.MultipleSequenceAlignment
{
    /// <summary>
    /// Implementation of Progressive Aligner (PA) class.
    /// 
    /// PA follows the binary guide tree (generated by Hierarchical Clustering algorithm)
    /// in a prefix order (children before parents).
    /// 
    /// It starts from the closest pair of sequences of leaves, proceeds to parent nodes,
    /// and terminates when the root is aligned. A multiple sequence alignment (MSA) is 
    /// then generated. The tree node alignment order is the same as they are clustered (in
    /// the 'Nodes' list).
    /// 
    /// The sequences in the tree leaves are converted to profiles so that the alignment of
    /// leaves and internal nodes are done by profile-profile pair-wise alignement algorithm.
    /// When aligning two children nodes in the tree, a profile of the alignment is generated 
    /// and assigned to the parent node. The alignment operation on the children nodes is 
    /// recorded into eStrings and assigned to the children nodes, so that the sequence data 
    /// is not modified until the whole tree is aligned. Then the alignment operation of each
    /// leaf sequence is applied by applying eStrings of the nodes from the leaf node to the 
    /// root alone the tree path.
    /// 
    /// Profile-profile alignment is done by ProfileAligner class. Same as pairwise aligner,
    /// profile-profile aligner also requires the definition of gap panelty, score of two 
    /// symbol (profile vector in this case, instead of Similarity Matrix).
    /// 
    /// </summary>
    public class ProgressiveAligner : IProgressiveAligner
    {
        #region Member variable

        // Aligned sequences. The sequence index order is the same as in input.
        private List<ISequence> _alignedSequences = null;

        // The aligner that aligns two profiles.
        private IProfileAligner _profileAligner = null;

        // The two nodes that are currently aligned.
        private BinaryGuideTreeNode _nodeA = null, _nodeB = null;

        /// <summary>
        /// The aligned sequences in this class
        /// </summary>
        public List<ISequence> AlignedSequences 
        { 
            get { return _alignedSequences; } 
        }

        /// <summary>
        /// The name of this progressive aligner.
        /// </summary>
        public string Name
        {
            get { return Resource.ProgressiveAlignerName; }
        }
        #endregion

        #region Constructors
        /// <summary>
        /// Construct a progressive aligner
        /// </summary>
        /// <param name="profileAlignerName">ProfileAlignerNames member</param>
        /// <param name="similarityMatrix">similarity matrix</param>
        /// <param name="gapOpenPenalty">negative gapOpenPenalty</param>
        /// <param name="gapExtendPenalty">negative gapExtendPenalty</param>
        public ProgressiveAligner(ProfileAlignerNames profileAlignerName,
                                  SimilarityMatrix similarityMatrix,
                                  int gapOpenPenalty,
                                  int gapExtendPenalty)
        {
            // Get ProfileAligner ready
            switch (profileAlignerName)
            {
                case (ProfileAlignerNames.NeedlemanWunschProfileAligner):
                    _profileAligner = new NeedlemanWunschProfileAlignerSerial();
                    break;
                case (ProfileAlignerNames.SmithWatermanProfileAligner):
                    _profileAligner = new SmithWatermanProfileAlignerSerial();
                    break;
                default:
                    throw new Exception("Invalid profile aligner name");
            }
            
            _profileAligner.SimilarityMatrix = similarityMatrix;
            _profileAligner.GapOpenCost = gapOpenPenalty;
            _profileAligner.GapExtensionCost = gapExtendPenalty;

            _alignedSequences = new List<ISequence>();
        }

        /// <summary>
        /// Construct a progressive aligner with a default profile aligner
        /// </summary>
        /// <param name="profileAligner">profile aligner interface that aligns two profiles</param>
        public ProgressiveAligner(IProfileAligner profileAligner)
        {
            // Get ProfileAligner ready
            _profileAligner = profileAligner;

            _alignedSequences = new List<ISequence>();
        }
        #endregion

        #region Methods
        /// <summary>
        /// Main pregressive alignment algorithm aligns a set of sequences guided by
        /// a binary tree. 
        /// </summary>
        /// <param name="sequences">input sequences</param>
        /// <param name="tree">a binary guide tree</param>
        public void Align(IList<ISequence> sequences, BinaryGuideTree tree)
        {
            SequenceWeighting sequenceWeighting = null;
            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {

                sequenceWeighting = new SequenceWeighting(tree);
                /*
                for (int i = 0; i < sequenceWeighting.Weights.Length; ++i)
                {
                    sequenceWeighting.Weights[i] = 1;
                }
                */
            }

            if (sequences.Count==0)
            {
                throw new ArgumentException("Empty set of sequences");
            }
            IAlphabet alphabet = sequences[0].Alphabet;

            Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, alphabet))
                {
                    throw new ArgumentException("Inconsistent sequence alphabet");
                }
            });

            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i], sequenceWeighting.Weights[i]);
                    tree.Nodes[i].Weight = sequenceWeighting.Weights[i];
                });
            }
            else
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                });
            }
            
            // Iterate internal nodes; 
            // as defined in the tree, the last node is the root
            for (int i = sequences.Count; i < tree.Nodes.Count; ++i)
            {
                if (tree.Nodes[i].NeedReAlignment)
                {
                    // pull out its children
                    _nodeA = tree.Nodes[i].LeftChildren;
                    _nodeB = tree.Nodes[i].RightChildren;

                    if (PAMSAMMultipleSequenceAligner.UseWeights)
                    {
                        _profileAligner.Weights = new float[2];
                        _profileAligner.Weights[0] = _nodeA.Weight;
                        _profileAligner.Weights[1] = _nodeB.Weight;

                        tree.Nodes[i].Weight = _nodeA.Weight + _nodeB.Weight;
                    }

                    // align two profiles
                    ProfileAlignment result = null;
                    if (_nodeA.ProfileAlignment.NumberOfSequences < _nodeB.ProfileAlignment.NumberOfSequences)
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                                                    _nodeA.ProfileAlignment, _nodeB.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                    }
                    else
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                                                    _nodeB.ProfileAlignment, _nodeA.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                    }


                    // children node profiles can be deleted
                    _nodeA.ProfileAlignment.Clear();
                    _nodeB.ProfileAlignment.Clear();
                }
            }

            // Convert original unaligned sequences to aligned ones by applying alignment paths in eStrings
            try
            {
                _alignedSequences = new List<ISequence>(sequences.Count);
            }
            catch (OutOfMemoryException ex)
            {
                throw new Exception("Out of memory", ex.InnerException);
            }

            for (int i=0; i<sequences.Count;++i)
            {
                _alignedSequences.Add(null);
            }

            Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                ISequence seq = sequences[i];
                BinaryGuideTreeNode node;
                node = tree.Nodes[i];
                while (!node.IsRoot)
                {
                    seq = _profileAligner.GenerateSequenceFromEString(node.EString, seq);
                    node = node.Parent;
                }
                _alignedSequences[i] = seq;
            });
        }
        #endregion
    }
}
